### clean ZIP and county level median income data from US census
# use tidycensus and census API key to import data directly from census api
### last modified by: Charlotte Ambrozek
## last modified date: 3 May 2023
library(tidycensus)
library(tidyverse)
library(haven)

census_api_key("YOURCENSUSAPIKEYHERE")

countymedinc <- get_acs(geography = "county",
                  variables = c(medincome = "B19013_001",
                                numinincpovratio = "B17002_001",
                                incpovratiolt.5 = "B17002_002",
                                incpovratio.5to.74 = "B17002_003",
                                incpovratio.75to.99 = "B17002_004",
                                incpovratio1to1.24 = "B17002_005",
                                incpovratio1.25to1.49 = "B17002_006",
                                incpovratio1.5to1.74 = "B17002_007",
                                incpovratio1.75to1.84 = "B17002_008",
                                incpovratio1.85to1.99 = "B17002_009"),
                  year = 2005,
                  survey = "acs1") %>%
  select(!c(moe)) %>%
  pivot_wider(id_cols = c("GEOID", "NAME"),
              names_from = variable,
              values_from = estimate) %>%
  mutate(year = 2005,
         ct_fips = GEOID,
         numpoor = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99,
         numltwic = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99 + incpovratio1to1.24 + incpovratio1.25to1.49 + incpovratio1.5to1.74 + incpovratio1.75to1.84,
         numlt2fpl = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99 + incpovratio1to1.24 + incpovratio1.25to1.49 + incpovratio1.5to1.74 + incpovratio1.75to1.84 + incpovratio1.85to1.99) %>%
  mutate(sharepoor = numpoor/numinincpovratio,
         shareltwic = numltwic/numinincpovratio,
         sharelt2fpl = numlt2fpl/numinincpovratio) %>%
  select(c(year, ct_fips, medincome, sharepoor, shareltwic, sharelt2fpl))

for (y in 2006:2018){
  data <- get_acs(geography = "county",
                  variables = c(medincome = "B19013_001",
                                numinincpovratio = "B17002_001",
                                incpovratiolt.5 = "B17002_002",
                                incpovratio.5to.74 = "B17002_003",
                                incpovratio.75to.99 = "B17002_004",
                                incpovratio1to1.24 = "B17002_005",
                                incpovratio1.25to1.49 = "B17002_006",
                                incpovratio1.5to1.74 = "B17002_007",
                                incpovratio1.75to1.84 = "B17002_008",
                                incpovratio1.85to1.99 = "B17002_009"),
                  year = y,
                  survey = "acs1") %>%
    select(!c(moe)) %>%
    pivot_wider(id_cols = c("GEOID", "NAME"),
                names_from = variable,
                values_from = estimate) %>%
    mutate(year = y,
           ct_fips = GEOID,
           numpoor = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99,
           numltwic = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99 + incpovratio1to1.24 + incpovratio1.25to1.49 + incpovratio1.5to1.74 + incpovratio1.75to1.84,
           numlt2fpl = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99 + incpovratio1to1.24 + incpovratio1.25to1.49 + incpovratio1.5to1.74 + incpovratio1.75to1.84 + incpovratio1.85to1.99) %>%
    mutate(sharepoor = numpoor/numinincpovratio,
           shareltwic = numltwic/numinincpovratio,
           sharelt2fpl = numlt2fpl/numinincpovratio) %>%
    select(c(year, ct_fips, medincome, sharepoor, shareltwic, sharelt2fpl))
  countymedinc <- full_join(countymedinc, data)
}

medinc2005 <- filter(countymedinc, year == 2005) %>%
  mutate(lowinc = medincome <= quantile(medincome, probs = 0.25),
  highpov = sharepoor >= quantile(sharepoor, probs = 0.75),
  highwicelig = shareltwic >= quantile(shareltwic, probs = 0.75)) %>%
  select(c(ct_fips, lowinc, highpov, highwicelig))

countymedinc <- left_join(countymedinc, medinc2005)

save(countymedinc, file = "./data/cleaned/countyyearmedianincome.rdata")
write_dta(countymedinc, "./data/cleaned/countyyearmedianincome.dta")

zipmedinc <- get_acs(geography = "zcta",
                     variables = c(medincome = "B19013_001",
                                   numinincpovratio = "C17002_001",
                                   incpovratiolt1 = "C17002_002",
                                   incpovratio1to2 = "C17002_003",
                                   incpovratiogt2 = "C17002_004"),
                     year = 2011,
                     survey = "acs5") %>%
  select(!c(moe)) %>%
  pivot_wider(id_cols = c("GEOID", "NAME"),
              names_from = variable,
              values_from = estimate) %>%
  mutate(year = 2011,
         numlt2fpl = incpovratiolt1 + incpovratio1to2) %>%
  mutate(sharepoor = incpovratiolt1/numinincpovratio,
         sharelt2fpl = numlt2fpl/numinincpovratio) %>%
  select(c(year, GEOID, NAME, medincome, sharepoor, sharelt2fpl))

for (y in 2012:2018){
  data <- get_acs(geography = "zcta",
                  variables = c(medincome = "B19013_001",
                                numinincpovratio = "C17002_001",
                                incpovratiolt1 = "C17002_002",
                                incpovratio1to2 = "C17002_003",
                                incpovratiogt2 = "C17002_004"),
                  year = y,
                  survey = "acs5") %>%
    select(!c(moe)) %>%
    pivot_wider(id_cols = c("GEOID", "NAME"),
                names_from = variable,
                values_from = estimate) %>%
    mutate(year = y,
           numlt2fpl = incpovratiolt1 + incpovratio1to2) %>%
    mutate(sharepoor = incpovratiolt1/numinincpovratio,
           sharelt2fpl = numlt2fpl/numinincpovratio) %>%
    select(c(year, GEOID, NAME, medincome, sharepoor, sharelt2fpl))
  zipmedinc <- full_join(zipmedinc, data)
}

zipmedinc <- mutate(zipmedinc, zip = as.numeric(substr(NAME, 7, 11))) %>%
  select(year, zip, medincome, sharepoor, sharelt2fpl)

zipmedinc2011 <- filter(zipmedinc, year == 2011) %>%
  mutate(lowinc = medincome <= quantile(medincome, probs = 0.25, na.rm = TRUE),
         highpov = sharepoor >= quantile(sharepoor, probs = 0.75, na.rm = TRUE)) %>%
  select(c(zip, lowinc, highpov))

zipmedinc <- left_join(zipmedinc, zipmedinc2011)


save(zipmedinc, file = "./data/cleaned/zipyearmedianincome.rdata")
write_dta(zipmedinc, "./data/cleaned/zipyearmedianincome.dta")
